#### Produce graphs describing the cross-validation results

### Prepare environment

setwd("~/research/coffee-dataverse/src")

library(ggplot2)
library(dplyr)

### Load and prepare results data

results <- read.csv("../data/crossval.csv", colClasses=c('character', 'character'))
results$include.below0 <- as.logical(results$include.below0)
results$include.tmin <- as.logical(results$include.tmin)
results$include.tmax <- as.logical(results$include.tmax)
results$include.tavg <- as.logical(results$include.tavg)
results$include.gedd <- as.logical(results$include.gedd)
results$include.prcp <- as.logical(results$include.prcp)
results$threshold.gdd <- as.numeric(as.character(results$threshold.gdd))
results$threshold.edd <- as.numeric(as.character(results$threshold.edd))
results$rmse <- as.numeric(as.character(results$rmse))

monthsuffs <- c(paste0('0', 1:9, '.1'), paste0(10:12, '.1'), paste0('0', 1:9), 10:12)

resord <- results[order(results$rmse),]

### Identify the preferred models

ggplot(resord, aes(1:nrow(resord), rmse)) +
    geom_line() + geom_vline(alpha=.5, aes(xintercept=which.min(resord$rmse + !resord$include.prcp), colour="Alt. spec.")) +
    geom_vline(alpha=.5, aes(xintercept=which.min(resord$rmse + as.numeric(!resord$include.prcp) + as.numeric(resord$include.tmax)), colour="Main spec.")) +
    scale_y_continuous(expand=c(0, 0)) + theme_bw() + scale_colour_discrete(name="Specification")
ggsave("../figures/xval/spec-scale-big.pdf", width=6.5, height=5)

ggplot(subset(results, !include.below0 & include.tmin & !include.tmax & !include.tavg & include.gedd & include.prcp & threshold.gdd == 10 & threshold.edd == 30)  %>% group_by(month1, month2) %>% summarize(rmse=mean(rmse)), aes(month1, month2, fill=rmse)) +
    geom_raster() + geom_point(x='05.1', y='08.1', shape=23, size=4, fill='white') +
    scale_fill_gradient("Cross-validated\nRMSE", low='#f7fcb9', high='#f03b20') +
    scale_x_discrete("Starting month", labels=gsub("\\.1", "-1", monthsuffs)) +
    scale_y_discrete("Ending month", labels=gsub("\\.1", "-1", monthsuffs)) + theme_bw()
ggsave("../figures/xval/preferred-vs-months.png", height=6.5, width=8.5)

ggplot(subset(results, month1 == '05.1' & month2 == '08.1' & !include.below0 & include.tmin & !include.tmax & !include.tavg & include.gedd & include.prcp), aes(threshold.gdd, threshold.edd, fill=rmse)) +
    geom_raster() + geom_point(x=10, y=30, shape=23, size=4, fill='white') +
    scale_fill_gradient("Cross-validated\nRMSE", low='#f7fcb9', high='#f03b20') +
xlab("GDD Lower limit (C)") + ylab("EDD Lower limit (C)") + theme_bw()
ggsave("../figures/xval/preferred-vs-gedd.png", height=5, width=6.5)

allspec <- subset(results, month1 == '05.1' & month2 == '08.1' & (!include.gedd | (threshold.gdd == 10 & threshold.edd == 30)) & !include.below0)
allspec$rank <- rank(allspec$rmse)
allspec$title <- sapply(1:nrow(allspec), function(ii) {
    allvars <- c(ifelse(allspec$include.below0[ii], 'Below0', NA),
                 ifelse(allspec$include.tmin[ii], 'Tmin', NA),
                 ifelse(allspec$include.tmax[ii], 'Tmax', NA),
                 ifelse(allspec$include.tavg[ii], 'Tavg', NA),
                 ifelse(allspec$include.gedd[ii], 'GDD+EDD', NA),
                 ifelse(allspec$include.prcp[ii], 'Prcp.', NA))
    allvars <- allvars[!is.na(allvars)]
    allvars[length(allvars) == 0] <- "(None)"
    paste(allvars, collapse=" + ")
})

allspec$title <- factor(allspec$title, levels=allspec$title[order(allspec$rmse)])

ggplot(allspec, aes(title, rmse)) +
    geom_point(aes(colour=title == "Tmin + GDD+EDD + Prcp.")) +
    theme_bw() + coord_flip() +
    theme(axis.text.x=element_text(angle=90, vjust=.5, hjust=1)) +
    guides(colour="none") + ylab("Cross-validated RMSE") + xlab(NULL)
ggsave("../figures/xval/spec-scale.pdf", width=6.5, height=3)
